net_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_singlenuclei/3rd_pass/snakemake-sn/results/"

Clusters with gene_names

macro_type = params$cell_type #macro_structure. It can be cell_type, metabolites, region of the brain. 
message(paste0("Cell type: ", macro_type))
## Cell type: opc
min_clust = 30
cluster_codes_df = read.table(paste0(net_dir, "/", macro_type, "/geneBycluster.txt"), header = T, check.names = F, stringsAsFactors = F)
message(paste0("Number of unique genes: ", length(unique(cluster_codes_df$ensembl))))
## Number of unique genes: 15255
createDT(cluster_codes_df)

Number of genes by cluster

Clusters level 1

count1 = as.data.frame(table(cluster_codes_df$cluster_lv1))
colnames(count1) = c("cluster", "n_nodes")
total_nodes = sum(count1$n_nodes)
nodes_in_cluster = sum(count1$n_nodes[count1$n_nodes >= min_clust])
message(paste0("Number of clusters with at least 30 nodes: "), length(count1$cluster[count1$n_nodes > min_clust])) # 30 in this case    
## Number of clusters with at least 30 nodes: 3
message(paste0("Number of genes assigned in clusters with at least 30 nodes: "), nodes_in_cluster, ". Percentage: ", (nodes_in_cluster/total_nodes)*100, "% of the genes are assigned to a cluster.")
## Number of genes assigned in clusters with at least 30 nodes: 15270. Percentage: 100% of the genes are assigned to a cluster.
createDT(count1) 

Clusters level 2

count2 = as.data.frame(table(cluster_codes_df$cluster_lv2))
colnames(count2) = c("cluster", "n_nodes")
total_nodes = sum(count2$n_nodes)
nodes_in_cluster = sum(count2$n_nodes[count2$n_nodes >= min_clust])
message(paste0("Number of clusters with at least 30 nodes: "), length(count2$cluster[count2$n_nodes >= min_clust]))  
## Number of clusters with at least 30 nodes: 9
message(paste0("Number of genes assigned in clusters with at least 30 nodes: "), nodes_in_cluster, ". Percentage: ", (nodes_in_cluster/total_nodes)*100, "% of the genes are assigned to a cluster.")
## Number of genes assigned in clusters with at least 30 nodes: 15270. Percentage: 100% of the genes are assigned to a cluster.
createDT(count2) 

Clusters level 3

count3 = as.data.frame(table(cluster_codes_df$cluster_lv3))
colnames(count3) = c("cluster", "n_nodes")
total_nodes = sum(count3$n_nodes)
nodes_in_cluster = sum(count3$n_nodes[count3$n_nodes >= min_clust])
message(paste0("Number of clusters with at least 30 nodes: "), length(count3$cluster[count3$n_nodes >= min_clust])) # 30 in this case    
## Number of clusters with at least 30 nodes: 28
message(paste0("Number of genes assigned in clusters with at least 30 nodes: "), nodes_in_cluster, ". Percentage: ", (nodes_in_cluster/total_nodes)*100, "% of the genes are assigned to a cluster.")
## Number of genes assigned in clusters with at least 30 nodes: 15186. Percentage: 99.4499017681729% of the genes are assigned to a cluster.
createDT(count3) 

Clusters level 4

count4 = as.data.frame(table(cluster_codes_df$cluster_lv4))
colnames(count4) = c("cluster", "n_nodes")
total_nodes = sum(count4$n_nodes)
nodes_in_cluster = sum(count4$n_nodes[count4$n_nodes >= min_clust])
message(paste0("Number of clusters with at least 30 nodes: "), length(count4$cluster[count4$n_nodes >= min_clust])) # 30 in this case    
## Number of clusters with at least 30 nodes: 77
message(paste0("Number of genes assigned in clusters with at least 30 nodes: "), nodes_in_cluster, ". Percentage: ", (nodes_in_cluster/total_nodes)*100, "% of the genes are assigned to a cluster.")
## Number of genes assigned in clusters with at least 30 nodes: 13441. Percentage: 88.022265880812% of the genes are assigned to a cluster.
createDT(count4) 

Clusters level 5

count5 = as.data.frame(table(cluster_codes_df$cluster_lv5))
colnames(count5) = c("cluster", "n_nodes")
total_nodes = sum(count5$n_nodes)
nodes_in_cluster = sum(count5$n_nodes[count5$n_nodes >= min_clust])
message(paste0("Number of clusters with at least 30 nodes: "), length(count5$cluster[count5$n_nodes >= min_clust])) # 30 in this case    
## Number of clusters with at least 30 nodes: 66
message(paste0("Number of genes assigned in clusters with at least 30 nodes: "), nodes_in_cluster, ". Percentage: ", (nodes_in_cluster/total_nodes)*100, "% of the genes are assigned to a cluster.")
## Number of genes assigned in clusters with at least 30 nodes: 4072. Percentage: 26.6666666666667% of the genes are assigned to a cluster.
createDT(count5) 

Session info

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Stream 8
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ggeasy_0.1.3     readxl_1.3.1     kableExtra_1.3.4 R.matlab_3.6.2   forcats_0.5.1    stringr_1.4.0    dplyr_1.0.8      purrr_0.3.4      readr_2.1.2      tidyr_1.2.0     
## [11] tibble_3.1.6     tidyverse_1.3.1  limma_3.50.1     ggfortify_0.4.14 ggplot2_3.3.5   
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.8        svglite_2.1.0     lubridate_1.8.0   assertthat_0.2.1  digest_0.6.29     utf8_1.2.2        R6_2.5.1          cellranger_1.1.0  backports_1.4.1  
## [10] reprex_2.0.1      evaluate_0.15     httr_1.4.2        pillar_1.7.0      rlang_1.0.1       rstudioapi_0.13   jquerylib_0.1.4   R.utils_2.11.0    R.oo_1.24.0      
## [19] DT_0.20           rmarkdown_2.11    webshot_0.5.2     htmlwidgets_1.5.4 munsell_0.5.0     broom_0.7.12      compiler_4.1.2    modelr_0.1.8      xfun_0.29        
## [28] systemfonts_1.0.4 pkgconfig_2.0.3   htmltools_0.5.2   tidyselect_1.1.2  gridExtra_2.3     viridisLite_0.4.0 fansi_1.0.2       crayon_1.5.0      tzdb_0.2.0       
## [37] dbplyr_2.1.1      withr_2.4.3       R.methodsS3_1.8.1 grid_4.1.2        jsonlite_1.7.3    gtable_0.3.0      lifecycle_1.0.1   DBI_1.1.2         magrittr_2.0.2   
## [46] scales_1.1.1      cli_3.2.0         stringi_1.7.6     fs_1.5.2          xml2_1.3.3        bslib_0.3.1       ellipsis_0.3.2    generics_0.1.2    vctrs_0.3.8      
## [55] tools_4.1.2       glue_1.6.1        crosstalk_1.2.0   hms_1.1.1         fastmap_1.1.0     yaml_2.3.5        colorspace_2.0-3  rvest_1.0.2       knitr_1.37       
## [64] haven_2.4.3       sass_0.4.0